# Survey Processing Functions
# Alex F. Gazmararian
# agazmararian@gmail.com

#' Clean address strings for geocoding
#' @param address Character vector of addresses
#' @return Character vector of cleaned addresses
clean_address <- function(address) {
  address <- stringr::str_squish(address)
  
  # Remove common problematic characters
  address <- stringr::str_replace_all(address, "[#\\*]", "")
  
  # Standardize apartment/unit indicators
  address <- stringr::str_replace_all(address, "\\b(apt|apartment|unit|ste|suite)\\s*\\.?\\s*", "Unit ")
  
  # Remove extra whitespace
  address <- stringr::str_squish(address)
  
  return(address)
}

#' Validate state codes
#' @param state Character vector of state codes or names
#' @return Logical vector indicating valid states
validate_state <- function(state) {
    if (is.na(state)) return(FALSE)
    if (state %in% c("AS", "GU", "MP", "PR", "VI")) {
        # Territories (excluding DC)
        return(FALSE)
    }
    # Check if it's a valid US state or DC
    valid_states <- c(state.abb, state.name, "DC")
    return(state %in% valid_states)
}

#' Convert state names to abbreviations
#' @param state_name Character vector of state names
#' @return Character vector of state abbreviations
convert_state_name <- function(state_name) {
    purrr::map_chr(state_name, function(s) {
        if (is.na(s)) return(NA_character_)
        
        # First try exact match with state names
        idx <- match(s, state.name)
        if (!is.na(idx)) return(state.abb[idx])
        
        # Try case-insensitive match
        idx <- match(stringr::str_to_title(s), state.name)
        if (!is.na(idx)) return(state.abb[idx])
        
        # If already an abbreviation, return as-is
        if (s %in% state.abb) return(s)
        
        # Return original if no match
        return(s)
    })
}
